/***************************************************************************
 *
 * Copyright (C) 2001 International Business Machines
 * All rights reserved.
 *
 * This file is part of the GPFS mmfslinux kernel module.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *  2. Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution. 
 *  3. The name of the author may not be used to endorse or promote products 
 *     derived from this software without specific prior written
 *     permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *************************************************************************** */
/**************************************************************************
 *
 * lxtrace : the command.  Turns tracing on/off and formats trace files.
 *
 *  "lxtrace {on hw[,hw[,hw...]] [trcFile [filesize [bufsize]]]" }
 *           {off                                               }
 *           {format [-T] [-v] [-t formatFile] [-o outFile] [trcFile] }
 *
 *           on   - begins tracing
 *           off  - stops tracing
 *           hw   - hookword (identifies traces to be captured)
 *                  This is a comma-separated list (up to LXTRACE_MAX_HW)
 *           trcFile  - path to the file where output is written
 *           filesize - maximum size (bytes) of the wrap-around file
 *                      A filesize of 0 will cause the file NOT to wrap
 *           bufsize  - the size of buffer to used by the device
 *                      to record trace records
 *
 *   For debug/testing purposes, the following additional options
 *   are defined:
 *   "lxtrace dump              // returns state information in a buffer
 *           fsync              // flush buffered records to the read buffer
 *
 * $Id: lxtrace.c,v 1.6 2001/12/10 22:05:30 schmuck Exp $
 *
 * $Log: lxtrace.c,v $
 * Revision 1.6  2001/12/10 22:05:30  schmuck
 * Previous fix missed an instance of fpos_t in a DBGASSERT.
 *
 * Revision 1.5  2001/12/10 19:23:07  schmuck
 * Avoid the use of fpos_t, since it's defined as a long on some systems and
 * a struct on others.  Instead of fgetpos, use ftell.
 *
 * Revision 1.4  2001/12/10 16:59:06  dcraft
 * fpos_t is a structure...not an int.
 *
 * Revision 1.3  2001/12/10 02:19:26  schmuck
 * Fix some more bugs in lxtrace format:
 * Got confused by \" or %% in format string.
 * Didn't correctly handle more than one %lld preceding an id2name parameter.
 * Only compared first 6 characters of id2name conversion names.
 * Also, fixed some gross inefficiencies; runs 6 times faster now.
 *
 * Revision 1.2  2001/12/07 22:24:36  schmuck
 * Fix bug in handling _id2name conversion in traces containing strings.
 * Add checks so lxtrace format doesn't blow up if trace data header is bad.
 *
 * Revision 1.1  2001/03/29 21:33:00  dixonbp
 * Convert block,dir, and lxtrace to .c files
 *
 * Revision 1.8  2001/01/08 21:20:47  wyllie
 * Remove compiler warning
 *
 * Revision 1.7  2001/01/04 15:30:31  kywang
 * Do not change the content of a constant string, it may get SIGSEGV.
 *
 * Revision 1.6  2000/12/18 13:53:18  gjertsen
 * More cleanup of comments/documentation.
 *
 * Revision 1.5  2000/12/15 13:56:45  gjertsen
 * Clean up documentation.
 *
 * Revision 1.4  2000/12/05 22:06:52  gjertsen
 * IA64 fixes for latest gnu toolchain w/ glibc2.2 and 2.4.0test10 kernel.
 * Replace vmlist macro calls with full calls to spinlock.
 *
 * Revision 1.3  2000/11/08 15:27:05  gjertsen
 * Further cleanup linux build warnings.
 *
 * Revision 1.2  2000/11/02 19:46:27  gjertsen
 * Linux code split. Pull out NBD stuff.
 *
 * Revision 1.1  2000/11/02  14:47:29  dixonbp
 * Rename ltrace to lxtrace (including related defines and structs) to
 * avoid confusion with the existing linux ltrace (library trace) command.
 *
// Revision 1.29  2000/10/28  01:14:08  wyllie
// Use uppercase hex in TRACEs and printfs consistently
//
// Revision 1.28  2000/10/27  03:00:37  tee
// Thread IDs greater or equal to 256 are really message handler IDs, so
// look them up using tscMsg_id2name instead of Thread_id2name.
//
 * Revision 1.27  2000/10/27 01:45:35  wyllie
 * Clean up ltrace: Fix _STrace of string arguments that are not the last
 * parameter.  Check for buffer overflow when building S or X type traces.
 * Shorten header in trace file by eliminating and combining fields.  Make
 * sure strings in trace file are null-terminated.  Round string lengths to
 * a multiple of the word size.
 *
// Revision 1.26  2000/10/25  16:53:37  wyllie
// Trace include reorganization: split old tasking/Trace.h into five new
// files, and move most of them into directories that will be shipped with
// GPFS on Linux.  Also change the code in trcid.h files generated by
// mktrace to not include AIX specific stuff like direct use of AIX trace
// macros.  Change names of all macros in generated code to have a leading
// underscore.  As a result of these changes, all trcid.h files had to
// be rebuilt.
//
// Revision 1.25  2000/10/16  21:21:45  dixonbp
// Missing parenthesis was causing bad id2name substitutions on format.
//
// Revision 1.24  2000/10/14  00:21:37  wyllie
// Count formatted records output; useful when running under gdb.
//
// Revision 1.23  2000/10/12  18:20:54  wyllie
// Use strncpy instead of memcpy to copy format strings to avoid SIGSEGV
// when strings are allocated near the end of the heap.
//
// Revision 1.22  2000/10/06  17:06:36  wyllie
// Update command syntax and options.  Made parameters to 'ltrace format' use
// letters (-t, -o, etc.) instead of being positional.  Add -v and -T options
// to change the formatting of the trace.  Made reasonable defaults for
// 'ltrace on' file names and sizes.  Always set buffer size in tracedev
// to avoid deadlocks due to mismatched sizes.
//
// Revision 1.21  2000/08/10  18:52:27  dixonbp
// Make sure messages go to stderr.
//
// Revision 1.20  2000/08/02  16:06:37  dixonbp
// Fix bug where id2name substitutions were being done as ip addresses
//
// Revision 1.19  2000/07/31  21:12:29  dcraft
// Define cxiDirent_t and move some defines out of gpfslinux.h
// to cxiTypes.h
//
// Revision 1.18  2000/07/25  16:18:11  gjertsen
// Add in function prototypes and a few type cast fixes.
//
// Revision 1.17  2000/07/21  18:40:42  dixonbp
// When formatting trace output, read the formatting file into
// memory and put it into a hash table based on hookword.
//
// Revision 1.16  2000/07/19  17:56:01  dixonbp
// Minor changes to some debug messages.
//
// Revision 1.15  2000/07/19  13:20:15  gjertsen
// Clean up code by adding typecasts, func prototypes, and misc compile fixes.
// Take care of module.h using `new' keyword for IA64.
//
// Revision 1.14  2000/07/13  21:40:19  dixonbp
// Fix a problem where format was not properly finding the next valid trace
// record after the point at which the trace file wrapped.
//
// Revision 1.13  2000/07/12  14:12:59  dixonbp
// Process trace records in chronological order when formatting a trace
// file that has wrapped.
//
// Revision 1.12  2000/07/10  21:09:30  schmuck
// Print timestamps as a decimal rather than hex (shows elapsed time in
// seconds since the first trace record, i.e., the first trace record
// always has a timestamp of zero).
//
// Revision 1.11  2000/06/29  21:07:28  dixonbp
// Fixes to formatting, better names for some of the structures to
// improve readability, and some cleanup of debug code.
//
// Revision 1.10  2000/06/29  19:50:27  dcraft
// print out a usage message and run cindent on this
//
// Revision 1.9  2000/06/13  14:49:13  dixonbp
// Don't bother looking up formatting records for XTrace generated records
// since they are already formatted.
//
// Revision 1.8  2000/06/07  19:40:05  dixonbp
// Support for string substitutions anywhere in a trace record.
//
// Revision 1.7  2000/06/06  20:03:19  dixonbp
// Minor fix.
//
// Revision 1.6  2000/05/30  21:27:48  wyllie
// Use cxi prefix instead of kxi.
//
// Revision 1.5  2000/05/26  23:41:11  wyllie
// Fix compile problems due to other changes.
//
// Revision 1.4  2000/05/15  22:25:15  wyllie
// Add CVS id and log entries.
//
 **************************************************************************/


#include <Shark-gpl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/ioctl.h>

#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <signal.h>
#include <fcntl.h>
#include <ctype.h>

#include <Trace.h>
#include <lxtrace.h>

/* Convenience macros for printing error and debug messages */
#define DP(fmt,args...) fprintf(stderr, fmt, ## args)
#ifdef LXTRACE_DEBUG
#define DFP(fmt,args...) fprintf(stderr, fmt, ## args)
#else
#define DFP(fmt,args...) ((void)0)
#endif


/* Examples of formatted trace output:

Default options:

 Timestamp   Pid  COMPONENT_TAG: application trace record
----------- ----- ---------------------------------------
   0.000000 21499 TRACE_VNODE: gpfs_i_revalidate enter:

-v option:

 Timestamp   Pid  P Hookword COMPONENT_TAG: application trace record
----------- ----- - -------- ---------------------------------------
   0.000000 21499 1 307004B3 TRACE_VNODE: gpfs_i_revalidate enter:

-T option:

   Timestamp      Pid  COMPONENT_TAG: application trace record
---------------- ----- ---------------------------------------
970706704.711164 21499 TRACE_VNODE: gpfs_i_revalidate enter:

-T and -v options:

   Timestamp      Pid  P Hookword COMPONENT_TAG: application trace record
---------------- ----- - -------- ---------------------------------------
970706704.711164 21499 1 307004B3 TRACE_VNODE: gpfs_i_revalidate enter:

*/

/* Define fragments of header lines and the corresponding trace printf format
   strings for each field on a trace output line */
#define H1_TS       " Timestamp  "
#define H2_TS       "----------- "
#define FMT_TS      "%4d.%06d "
#define H1_TS_T     "   Timestamp     "
#define H2_TS_T     "---------------- "
#define FMT_TS_T    "%9d.%06d "

#define H1_PID      "  Pid  "
#define H2_PID      "------ "
#define FMT_PID     "%6d "

#define H1_P_HW     ""
#define H2_P_HW     ""
#define FMT_P_HW    ""
#define H1_P_HW_V   "P Hookword "
#define H2_P_HW_V   "- -------- "
#define FMT_P_HW_V  "%1d %08X "

#define H1_TAG      "COMPONENT_TAG: application trace record\n"
#define H2_TAG      "---------------------------------------\n"
#define FMT_TAG     "%s: "

/* Bytes in an "_id2name" formatting substitution */
#define TRC_MAX_ID2NAME 128

#ifdef __64BIT__
typedef long long argType;
#define ARGLEN sizeof(long long)
#define TRC_ARG(N) (((long long *)&application_data)[N])
#else
typedef int argType;
#define ARGLEN sizeof(int)
#define TRC_ARG(N) (((int *)&application_data)[N])
#endif // __64BIT__

#define TRC_ARGLIST argP[0], argP[1], argP[2], argP[3], argP[4], argP[5], \
                    argP[6], argP[7], argP[8], argP[9], argP[10], argP[11]


/* Offsets of the various fields in the formatting file */
#define GFR_TRCID 0
#define GFR_HOOKID 15
#define GFR_COMPID 20
#define GFR_FORMAT 40

/* Max length of COMPID in the formatting file */
# define MAX_COMPID_LEN (GFR_FORMAT - GFR_COMPID - 1)

/* Thread IDs greater than this are really message handlers */
#define FirstMsgHandlerThread 0x100

/* Size of the hash tables for in-memory format file and its hash functions.
   HOOK_HASH is for looking up format strings by hook id.
   I2N_HASH is for looking up id2name conversions by conversion name and id
   value; the I2NHASH hash function uses the first 4 chars of the conversion
   name, relying on the fact that these names are at least 4 chars long,
   because they all end in "_id2name". */
#define HOOK_HASH_TABLE_SIZE 0x10000
#define HOOKHASH(H) ((H) & 0xFFFF)
#define I2N_HASH_TABLE_SIZE 0x4000
#define I2NHASH(F,I) ((*(int*)(F) + (I)) & 0x3FFF)

/* combined device and application trace data headers */
typedef struct trc_hdrs_t
{
  trc_header_t hdr;  /* device header */
  trc_datahdr_t rec; /* application header */
} trc_hdrs_t;

/* id2name conversion specifier. */
typedef struct i2nSpec_t
{
  int argNum;       // number of the argument to be converted
  char *i2nNameP;   // conversion function name
} i2nSpec_t;

/* Each format record has one of these headers describing it. */
typedef struct fmtDesc_t
{
  int       hookid;             // hookid or id2name value
  char      *cidP;              // component identifier or id2name tag
  char      *fmtP;              // format string or id2name string
  unsigned int llmask;          // bitvector indicating %ll arguments
  i2nSpec_t *i2nSpecP;          // array of id2name conversion specifiers
  struct fmtDesc_t *fmtNextP;   // next in the hash chain
} fmtDesc_t;

/* Hash tables for finding format records. */
fmtDesc_t * hookHashP[HOOK_HASH_TABLE_SIZE];
fmtDesc_t * i2nHashP[I2N_HASH_TABLE_SIZE];

/* Hash lookup statistics */
static int nHookLookup = 0;
static int nHookCheck = 0;
static int nI2NLookup = 0;
static int nI2NCheck = 0;
static int nI2NFail = 0;

/* static state information */
static int TraceHandle = -1;// file descriptor for the trace device
static int out_fd;        // file descriptor for the (raw trace) output file
static int outSize;       // out_fd filesize
static int bufSize;       // size of the trace buffers (device and daemon)
static char *bufP;        // the daemon trace read buffer
static int nWraps;        // number of times the output file has wrapped
static off_t currentPos;  // position of next write to output file

/* The previous time is used to validate a wrap point when we
   think we've encountered one. */
static struct timeval prevTime;

static char *formatFileP;     // malloc'ed buffer holding formatting data 
static char *fmtDescSpaceP;   // fmtData_t space for each format record


/* Convert ascii representation of a hexadecimal to int */
int axtoi(char s[])
{
  int i, n;

  n = 0;
  for (i = 0; s[i] != '\0'; i++)
  {
    if (s[i] >= '0' && s[i] <= '9')
      n = 16 * n + (s[i] - '0');
    else if (s[i] >= 'A' && s[i] <= 'F')
      n = 16 * n + (s[i] - 'A' + 10);
    else if (s[i] >= 'a' && s[i] <= 'f')
      n = 16 * n + (s[i] - 'a' + 10);
    else
      break;
  }
  return n;
}

/* Parse a quoted format string: removes enclosing quotes, removes trailing
   white-space, and converts escape sequences \n, \r, \t, \", etc.  Also
   examines '%' format arguments and computes a bitvector that indicates which
   of the arguments are long longs (%ll..).  The converted string is placed in
   outP; the i'th bit in *llmaskP is set if the i'th argument is a long long.
   It's ok to call this function to convert a string in-place (outP == inP),
   since the conversion only makes the string shorter (removes surrounding
   quotes and reduces escape sequences to a single char).
   Returns a pointer to the next character in inP following the ending quotes,
   or NULL if there are no more characters following the quoted string. */
char *ParseFormatString(char *inP, char *outP, unsigned int *llmaskP)
{
  char *initialOutP = outP;
  int b = 1;

  /* skip initial quote */
  if (*inP == '"')
    inP++;

  /* copy string until ending quote */
  *llmaskP = 0;
  while (*inP && *inP != '"')
  {
    if (*inP == '\\' && *(inP + 1))
    {
      inP++;
      switch(*inP)
      {
        case 'n': *outP++ = '\n'; inP++; break;
        case 'r': *outP++ = '\r'; inP++; break;
        case 't': *outP++ = '\t'; inP++; break;
        default:  *outP++ = *inP++;
      }
    }
    else if (*inP == '%')
    {
      *outP++ = *inP++;
      if (*inP == '%')
        *outP++ = *inP++;
      else
      {
        if (*inP == '+' | *inP == '-')
          *outP++ = *inP++;
        while (isdigit(*inP))
          *outP++ = *inP++;
        if (*inP == 'l' && *(inP+1) == 'l')
          *llmaskP |= b;
        b <<= 1;
      }
    }
    else
      *outP++ = *inP++;
  }

  /* remove trailing white space from the string */
  while (outP > initialOutP && isspace(outP[-1]))
    outP--;
  *outP = '\0';

  /* Return pointer to next character after the string or NULL if no ending
     quote was found */
  return *inP? inP + 1: NULL;
}


/* Parse a string containing a list of id2name conversion specifiers of the
   form "$n=xxx_id2name $m=yyy_id2name ...".  Returns a "null-terminted" array
   of i2nSpec_t structures, i.e., the array contains one more entry than the
   number of conversion specifiers found, and argNum/i2nNameP in the last
   array entry are set to 0/NULL. */
i2nSpec_t *ParseI2N(char *i2nP, unsigned int llmask)
{
  i2nSpec_t i2n[LXTRACE_MAX_FORMAT_SUBS];
  i2nSpec_t *i2nSpecP;
  char *p;
  int n, argNum, a, nll;
  unsigned int m;

  for (n = 0, p = strtok(i2nP, " \t\n");
       p && n < LXTRACE_MAX_FORMAT_SUBS;
       n++, p = strtok(NULL, " \t\n"))
  {
    if (*p != '$')
      break;
    p++;
    argNum = atoi(p);
    p = strchr(p, '=');
    if (argNum <= 0 || p == NULL)
      break;

#ifndef __64BIT__
    /* The "n" in "$n=" signifies that the "n"th parameter needs substitution.
       However, we need to account for any double-word ("%ll") values (will be
       taking two positions) in the arglist and adjust accordingly.  The
       llmask parameter indicates which arguments are of the %ll form.  Count
       the bits in llmask before the point of the substitution and add the
       count to the argNum value. */
    if (llmask)
    {
      nll = 0;
      for (a = 1, m = llmask;
           a < argNum && m;
           a++, m >>= 1)
      {
        if (m & 1)
          nll++;
      }
      argNum += nll;
    }
#endif // __64BIT__

    i2n[n].argNum = argNum;
    i2n[n].i2nNameP = p + 1;
  }
  if (n == 0)
    return NULL;  // ?? print msg about bad id2name specifiers

  i2nSpecP = (i2nSpec_t *)malloc((n + 1)*sizeof(i2nSpec_t));
  if (i2nSpecP != NULL)
  {
    memcpy(i2nSpecP, i2n, n*sizeof(i2nSpec_t));
    i2nSpecP[n].argNum = 0;
    i2nSpecP[n].i2nNameP = NULL;
  }
  return i2nSpecP;
}


/* The formatting records are read into memory and parsed.  A fmtDesc_t header
   is constructed for each and is then inserted into a hash table. */
int BuildFormatTable(FILE *fP)
{
  char buf[2048];
  long fileSize;   /* size (bytes) of the formatting file */
  int  nRecords;   /* Number of records in the formatting file */
  int  hookid;     /* hookid of current record */
  int  hashVal;    /* hash value */
  struct fmtDesc_t *nextDescP; 
  char *curFmtRecordP, *nextFmtRecordP;
  char *trcidP;
  char *hookP;
  char *cidP;
  char *fmtP;
  int i;
#ifdef DBGASSERTS
  char *formatFileEndP;
  char *fmtDescSpaceEndP;
#endif // DBGASSERTS

  /* Clear all entries in the hash table */
  memset(hookHashP, 0, sizeof(hookHashP));
  memset(i2nHashP,  0, sizeof(i2nHashP));

  rewind(fP);

  /* Count the number of formatting records */
  nRecords = 0;
  while (fgets(buf, sizeof(buf), fP) != NULL)
    nRecords += 1;

  /* Now that we are at the end of the file, see how big it is */
  fileSize = ftell(fP);
  if (fileSize < 0)
    return errno;

  /* Allocate the buffer for the file contents */
  if (NULL == (formatFileP = (char *)malloc(fileSize)))
    return ENOMEM;

  /* Allocate space for the fmtData_t for all of the records. */
  if (NULL == (fmtDescSpaceP = (char *)malloc(nRecords * sizeof(fmtDesc_t))))
    return ENOMEM;
  nextDescP = (fmtDesc_t *)fmtDescSpaceP; 

#ifdef DBGASSERTS
  formatFileEndP = formatFileP + fileSize;
  fmtDescSpaceEndP = fmtDescSpaceP + (nRecords * sizeof(fmtDesc_t)); 
#endif // DBGASSERTS

  rewind(fP); /* back to the beginning of the file */

  /* Read each record in one-at-a-time, parse the record, and insert it into
     the right hash table. */
  curFmtRecordP = formatFileP;
  while (fgets(curFmtRecordP, sizeof(buf), fP) != NULL)
  {
    nextFmtRecordP = curFmtRecordP + strlen(curFmtRecordP);

    /* The TRCID identifier, hookid, component id, and format string are at
       fixed offsets */
    trcidP = curFmtRecordP + GFR_TRCID;
    hookP  = curFmtRecordP + GFR_HOOKID;
    cidP   = curFmtRecordP + GFR_COMPID;
    fmtP   = curFmtRecordP + GFR_FORMAT;

    /* parse the format record. */
    hookid = axtoi(hookP) & 0x0000FFFF;
    nextDescP->hookid = hookid;
    nextDescP->cidP = strtok(cidP, " ");
    nextDescP->fmtP = fmtP;

    /* find the end of the format string */
    fmtP = ParseFormatString(fmtP, fmtP, &nextDescP->llmask);

    /* check for id2name conversion specifiers following the format string */
    nextDescP->i2nSpecP = NULL;
    if (fmtP)
    {
      while (isspace(*fmtP))
        fmtP++;
      if (*fmtP)
        nextDescP->i2nSpecP = ParseI2N(fmtP, nextDescP->llmask);
    }

    /* Place the format record descriptor into the right hash table. */
    if (strncmp(trcidP, "TRCID_", 6) == 0)
    {
      /* this record contains a format string */
      strcat(nextDescP->fmtP, "\n");
      hashVal = HOOKHASH(hookid);
      nextDescP->fmtNextP = hookHashP[hashVal];
      hookHashP[hashVal] = nextDescP;
    }
    else
    {
      /* this record contains an id2name conversion value */
      hashVal = I2NHASH(nextDescP->cidP, hookid);
      nextDescP->fmtNextP = i2nHashP[hashVal];
      i2nHashP[hashVal] = nextDescP;
    }

    /* Advance to the next fmtDesc and format record */
    nextDescP++; 
    curFmtRecordP = nextFmtRecordP;
  }

#ifdef DBGASSERTS
  /* verify that we didn't overrun our buffers */
  if ((char *)nextDescP > fmtDescSpaceEndP)
  {
    DP("lxtrace format: overwrote format descriptor memory!\n");
    return -1;
  }
  if (nextFmtRecordP > formatFileEndP)
  {
    DP("lxtrace format: overwrote format file memory!\n");
    return -1;
  }
#endif // DBGASSERTS

  return 0;
}


/* Find a format record by hookid. */
int GetFormatRecord(FILE *fP, int hookid, char **cidPP, char **fmtPP,
                    unsigned int *llmaskP, i2nSpec_t **i2nPP)
{
  fmtDesc_t *nextP;

  DFP("GetFormatRecord(0x%X, 0x%X, 0x%X, 0x%X, 0x%X)\n",
      fP, id, bufSize, bufP, cidPP);

  nHookLookup++;

  /* search hash chain for the specified hookid */
  for (nextP = hookHashP[HOOKHASH(hookid)];
       nextP != NULL;
       nextP = nextP->fmtNextP)
  {
    nHookCheck++;

    /* Check whether hookid matches. */
    if (nextP->hookid == hookid)
    {
      /* Hookid and the record type match.  Copy to work area. */
      *cidPP = nextP->cidP;
      *fmtPP = nextP->fmtP;
      *llmaskP = nextP->llmask;
      *i2nPP = nextP->i2nSpecP;
      DFP("GetFormatRecord for id %X is: %s\n", id, fmtP);
      return 0;
    }
  }

  /* not found */
  return -1;
}


/* Find an id2name string by conversion name ("tag") and id value */
int GetID2NameRecord(FILE *fP, char *tagP,
                     int id, int bufSize, char *bufP)
{
  fmtDesc_t *nextP;

  DFP("GetID2NameRecord(0x%X, 0x%X, 0x%X, 0x%X, 0x%X)\n",
      fP, tagP, id, bufSize, bufP);

  nI2NLookup++;

  /* search hash chain for the specified hookid and tag */
  for (nextP = i2nHashP[I2NHASH(tagP, id)];
       nextP != NULL;
       nextP = nextP->fmtNextP)
  {
    nI2NCheck++;

    /* Check whether hookid and tag match. */
    if (nextP->hookid == id &&
        strncmp(nextP->cidP, tagP, MAX_COMPID_LEN) == 0)
    {
      DFP("GetID2NameRecord for id %X is: %s\n", id, nextP->fmtP);
      strncpy(bufP, nextP->fmtP, bufSize);
      return 0;
    }
  }

  /* not found */
  nI2NFail++;
  return -1;
}


/* Read the next trace header from the raw trace file.  Normally, this just
   involves the read() for the specified file.  However, it also needs to
   check for wrapping of the file and make the necessary adjustments when we
   reach the wrap point. */
int ReadNextTrcHeader(FILE *raw_fp, trc_hdrs_t *hdrP)
{
  int nRead;
  off_t off1, off2;

  if (0 < (nRead = fread(hdrP, 1, sizeof(trc_hdrs_t), raw_fp)))
  {
    /* Successful read.  Validate the header. */
    if (hdrP->hdr.trMagic != LXTRACE_MAGIC)
    {
      /* Maybe we've reached the wrap point.  See if we can get oriented.
         Backup and try 1 byte further until we find a valid header.  This
         should be the wrap point.  If we can verify this, return the header.
       */
      fseek(raw_fp, 1-nRead, SEEK_CUR);
      off1 = ftell(raw_fp) - 1;
      while (0 < (nRead = fread(hdrP, 1, sizeof(trc_hdrs_t), raw_fp)))
      {
        /* Keep inching forward looking for a valid device header. */
        if ((hdrP->hdr.trMagic == LXTRACE_MAGIC) &&
             /* timestamps agree that we wrapped here */
            ((prevTime.tv_sec > hdrP->hdr.trTime.tv_sec) ||
             ((prevTime.tv_sec == hdrP->hdr.trTime.tv_sec) &&
              (prevTime.tv_usec > hdrP->hdr.trTime.tv_usec))))
        {
          /* We've found what appears to be a valid header. */
          prevTime = hdrP->hdr.trTime; /* timestamp of the last valid record */
          nWraps += 1;
          DFP("ReadNextTrcHeader: wrap point at %d gap %d\n", off1, off2-off1);
          return nRead;
        }
        fseek(raw_fp, 1-nRead, SEEK_CUR);
        off2 = ftell(raw_fp);
      }
      DP("lxtrace format: ReadNextTrcHeader failed to find a valid record!\n");
    }

    prevTime = hdrP->hdr.trTime; /* timestamp of the last valid record */
  }
  return nRead;
}

/* ProcessBuffer reads from the trace device and writes to the output file.
   This is a function used by the trace daemon when handling SIGIO from the
   device or after fsync() as the device is being closed. */
void ProcessBuffer()
{
  int rc, nBytes;

  /* Issue a read for one buffer to the trace device */
  DFP("trace daemon about to read %d bytes\n", bufSize);
  nBytes = read(TraceHandle, bufP, bufSize);
  DFP("trace daemon read %d bytes\n", nBytes);
  if (nBytes == -1)
  {
    DP("trace daemon read error %d\n", errno);
    close(TraceHandle);
    close(out_fd);
    _exit(1);
  }
  if (nBytes > 0)
  {
    /* Check the output file wrap-size and handle if it has been reached. */
    if ((out_fd > 2) && (outSize > 0) && ((currentPos + nBytes) > outSize))
    {
      /* Truncate any data that may have extended past this point from the last
         time we wrapped.  Seek back to the top of the file and continue. */
      DFP("trace daemon wrapping file at length %d\n", currentPos);
      ftruncate(out_fd, currentPos);
      lseek(out_fd, 0, SEEK_SET);
      currentPos = 0;
      nWraps += 1;
    }

    /* Write the buffer to the trace output file */
    DFP("trace daemon writing %d bytes at offset %d\n", nBytes, currentPos);
    rc = write(out_fd, bufP, nBytes);
    currentPos += nBytes;
  }
}

/* The daemon signal handler.  This is the main part of the daemon which (after
   initializing) does nothing except wait for signals.  SIGIO indicates that
   a trace buffer is ready to be processed, any other signal stops tracing and
   terminates the daemon. */
void SignalHandler(int signum)
{
  int rc;
  struct kArgs args;

  DFP("trace daemon got signal %d\n", signum);
  switch (signum)
  {
    case SIGIO:
      /* This is the main daemon function.  Handle SIGIO signals from the
         device and transfer the data from the device to the output file
         when received.  */
      ProcessBuffer();
      break;

    default:
      /* SIGTERM (or other) will terminate the daemon and end the trace. */
      // DP("trace daemon: terminating due to signal %d\n", signum);
      if (TraceHandle > 0)
      {
        /* Get any records that are buffered in the device. */
        rc = fsync(TraceHandle);
        ProcessBuffer();

        if (nWraps)
          DP("lxtrace: Trace file wrapped %d times.\n", nWraps);

        /* close everything */
        close(TraceHandle);
        close(out_fd);
      }

      /* daemon terminates */
      if (bufP)
        free(bufP);

      _exit(0);
  }
}


/* Print command syntax and exit */
void Usage()
{
  DP("\n\
Usage: lxtrace { on on-args | off | format format-args | dump | fsync }\n\
\n\
  Command line format for lxtrace on:\n\
    lxtrace on hw1[,hw2[,hw3...hw8]] [trcFile [filesize [bufsize]]]\n\
      hw1, ...   List of hook values (3 hex digits) that have tracing enabled.\n\
      trcFile    Name of raw trace file to be created.  Default is\n\
                   /tmp/lxtrace.trc.\n\
      filesize   Optional maximum size of wrap-around trace file.  A value\n\
                   of 0 means that the trace file grows indefinitely.  Default\n\
                   is 16M.\n\
      bufsize    Optional size of each buffer.  Default is 64K.\n\
\n\
  Command line format for lxtrace format:\n\
    lxtrace format [-T] [-v] [-t formatFile] [-o outFile] [trcFile]\n\
      -T         Display timestamps as seconds since 1/1/1970.  Default\n\
                   is to display the first timestamp as 0.0 and other\n\
                   timestamps as elapsed time since the first trace record.\n\
      -v         Verbose mode.  Include CPU number and hook word in trace\n\
                  output.  Default is to not include these.\n\
      formatFile Name of file containing trace formats.  Default is\n\
                   /usr/lpp/mmfs/mmfs.trcfmt.\n\
      outFile    Name of file to receive formatted trace output.  Default\n\
                   is standard output.\n\
      trcFile    Name of input raw trace file.  Default is /tmp/lxtrace.trc.\n");
  if (TraceHandle != -1)
    close(TraceHandle);
  exit(1);
}


/* The lxtrace command.  */
int main(int argc, char *argv[])
{
  int rc = 0;
  int nHooks;   // Number of hooks being enabled
  struct kArgs args; // command arguments
  int i;
  int nTrcHdrs;

  if (argc < 2)
    Usage();

  /* A new trace is being started.  Validate the options, open the files,
     start the daemon and configure the device. */
  if (strcmp(argv[1], "on") == 0)
  {
    int hw, dpid;
    struct sigaction action;
    char* trcFileNameP;

    /* Initialize local state fields. */
    out_fd = -1;
    bufP = NULL;
    outSize = DEF_TRC_FILESIZE;
    bufSize = DEF_TRC_BUFSIZE;
    currentPos = 0;
    nWraps = 0;

    if (argc < 3)
      Usage();

    /* Verify we have a comma separated string of hookwords. */
    rc = sscanf(argv[2], "%x,%x,%x,%x,%x,%x,%x,%x,%x",
                &hw,&hw,&hw,&hw,&hw,&hw,&hw,&hw,&hw);
    if ((rc <= 0) || (rc > LXTRACE_MAX_HW) ||
        (strlen(argv[2]) > LXTRACE_HW_STRING_LEN))
      Usage();
    nHooks = rc;

    /* Get output file name */
    if (argc >= 4)
      trcFileNameP = argv[3];
    else
      trcFileNameP = "/tmp/lxtrace.trc";

    /* Special cases of specifying stdout or stderr to receive the trace output. */
    if (strcmp(trcFileNameP, "stdout") == 0)
      out_fd = 1; //stdout;
    else if (strcmp(trcFileNameP, "stderr") == 0)
      out_fd = 2; //stderr;
    else
    {
      /* Open and trunc the output file. */
      if (0 > (out_fd = open(trcFileNameP, O_RDWR|O_CREAT|O_TRUNC)))
      {
        DP("lxtrace on: could not open file %s, errno %d\n", trcFileNameP, errno);
        rc = errno;
        goto exit_on;
      }
    }

    /* Save the output filesize.  If the size is explicitly given as zero,
       do not wrap. */
    if (argc >= 5)
    {
      outSize = atoi(argv[4]);
      if (((outSize < MIN_TRC_FILESIZE) || (outSize > MAX_TRC_FILESIZE)) &&
          (outSize != 0))
      {
        DP("Output file size must be 0 or between %d and %d\n",
            MIN_TRC_FILESIZE, MAX_TRC_FILESIZE);
        Usage();
      }
    }

    /* If specified, set the trace device buffer size (otherwise use default) */
    if (argc >= 6)
    {
      bufSize = atoi(argv[5]);
      if ((bufSize < MIN_TRC_BUFSIZE) || (bufSize > MAX_TRC_BUFSIZE))
      {
        DP("Buffer size must be between %d and %d\n",
            MIN_TRC_BUFSIZE, MAX_TRC_BUFSIZE);
        Usage();
      }
    }

    /* The output filesize must be at least as big as the selected buffer size. */
    if ((outSize != 0) && (outSize < bufSize))
    {
      DP("lxtrace: the filesize (%d) must be at least as large as the bufsize (%d)\n",
         outSize, bufSize);
      Usage();
    }

    /* The trace daemon does all the work.  It is created here ("lxtrace on") and
       lives until "trace off" sends it a SIGTERM signal. */
    dpid = fork();

    if (dpid < 0)
    {
      DP("lxtrace on: unable to fork trace daemon");
      rc = EAGAIN;
      goto exit;
    }

    else if (dpid != 0)
    { /* Parent */
      return(0);   /* parent's work is done. */
    }

    else
    { /* Child/Daemon */

      /* At this point, we are in the trace daemon.  Complete device
         configuration and then wait on its signals. */

      if (!(bufP = (char *)malloc(bufSize)))
      {
        DP("trace daemon: unable to malloc trace buffer");
        rc = ENOMEM;
        goto exit_on;
      }
      memset(bufP, 0, bufSize);     /* Force page faults to occur now */

      /* Initialize the signal handler */
      action.sa_handler = SignalHandler;
      sigfillset(&action.sa_mask);  /* block all signals while the handler is running */
      action.sa_flags = 0;

      if ((sigaction(SIGIO  , &action, NULL)) ||
          (sigaction(SIGTERM, &action, NULL)))
      {
        DP("trace daemon: unable to create signal handler\n");
        rc = EINVAL;
        goto exit_on;
      }

      /* Initialize the trace device */

      TraceHandle = open(TRC_DEVICE, O_RDWR);
      if (TraceHandle < 0)
      {
        /* Device does not exist, wrong type, or already open, ... */
        DP("trace daemon: device open failed with errno %d\n", errno);
        rc = ENODEV;
        goto exit_on;
      }

      /* Configure trace device buffer size. */
      args.arg1 = bufSize;
      rc = ioctl(TraceHandle, trc_bufSize, &args);
      if (rc < 0)
      {
        DP("trace daemon: device buffer request failed with errno %d\n", errno);
        rc = ENOMEM;
        goto exit_on;
      }

      /* Mark as ready for tracing by saving the trace classes to catch.
         argv[2] contains the list of hooks (e.g., 306,307,308,309).
         This becomes our first paramter to the ioctl trc_begin operation. */
      args.arg1 = nHooks;
      args.arg2 = (long)argv[2];
      rc = ioctl(TraceHandle, trc_begin, &args);
      if (rc < 0)
      {
        DP("trace daemon: device begin failed with errno %d\n", errno);
        rc = ENODEV;
        goto exit_on;
      }

      /* wait here forever (until SIGTERM is handled and exits) */
      while (1)
      {
        DFP("trace daemon about to wait\n");
        pause();
      }
      _exit(-1);
    } /* child/daemon */

exit_on:
    /* exit here for failures during daemon initialization. */
    DFP("trace daemon exitting\n");
    if (TraceHandle > 0)
      close(TraceHandle);
  }


  /* Stopping a previously activated trace by notifying to the device. */
  else if (strcmp(argv[1], "off") == 0)
  {
    int fd;

    fd = open(TRC_DEVICE, O_RDONLY);
    if (fd < 0)
    {
      DP("lxtrace off: device open failed rc %d errno %d\n", fd, errno);
      rc = ENODEV;
      goto exit;
    }

    /* trc_end terminates tracing. */
    args.arg1 = 0;
    rc = ioctl(fd, trc_end, &args);
    if (rc < 0)
    {
      DP("lxtrace off: device end failed with errno %d\n", errno);
      rc = EINVAL;
      goto exit_off;
    }

exit_off:
    /* When we close here, the daemon will be signalled to terminate. */
    close(fd);
  }


  /* Format the trace file. */
  else if (strcmp(argv[1], "format") == 0)
  {
    FILE *raw_fp; /* raw trace data file */
    FILE *fmt_fp; /* the application formatting file */
    FILE *out_fp; /* output file */
    Boolean hasWrapped; /* the raw trace file became full and wrapped */
    struct timeval tzero = { 0, 0 };
    trc_hdrs_t h;
    int useTOD = 0;    /* true if -T option given */
    int verbose = 0;   /* true if -v option given */
    char* formatFileNameP = "/usr/lpp/mmfs/mmfs.trcfmt";
    char* outputFileNameP = NULL;
    char* rawTraceFileNameP = "/tmp/lxtrace.trc";
    char *compP, *fmtP;
    unsigned int llmask;
    i2nSpec_t *i2nP;
    int nRecordsProcessed = 0;  /* useful when running under gdb */
    int i, id;
    Boolean msgHandler;

    /* work buffers */
    char application_data[LXTRACE_MAX_DATA];   /* raw trace data */

    /* Array of id2name substitutions to be made */
    typedef struct { char s[TRC_MAX_ID2NAME]; } idname_t;
    idname_t id2nameP[LXTRACE_MAX_FORMAT_SUBS];
    char fmtString[256];

    /* Array of pointers to the args for the fprintf that formats the output */
    argType argP[LXTRACE_MAX_FORMAT_SUBS];

    raw_fp = fmt_fp = out_fp = NULL;
    formatFileP = NULL;
    fmtDescSpaceP = NULL;

    /* Parse arguments */
    for (i = 2; i < argc ; i++)
    {
      if (strcmp(argv[i], "-T") == 0)
        useTOD = 1;
      else if (strcmp(argv[i], "-v") == 0)
        verbose = 1;
      else if (strcmp(argv[i], "-t") == 0  &&  i+1<argc)
      {
        i += 1;
        formatFileNameP = argv[i];
      }
      else if (strcmp(argv[i], "-o") == 0  &&  i+1<argc)
      {
        i += 1;
        outputFileNameP = argv[i];
      }
      else
        rawTraceFileNameP = argv[i];
    }

    /* Open the trace file. */
    if (NULL == (raw_fp = fopen(rawTraceFileNameP, "r")))
    {
      DP("lxtrace format: could not open file %s, errno %d\n",
         rawTraceFileNameP, errno);
      rc = errno;
      goto exit_format;
    }

    /* Open the formatting file. */
    if (NULL == (fmt_fp = fopen(formatFileNameP, "r")))
    {
      DP("lxtrace format: could not open file %s, errno %d\n", formatFileNameP, errno);
      rc = errno;
      goto exit_format;
    }

    if (0 != (rc = BuildFormatTable(fmt_fp)))
    {
      DP("lxtrace format: could not parse file: %s errno %d\n",
         formatFileNameP, errno);
      rc = errno;
      goto exit_format;
    }

    /* Open and truncate the output file. (defaults to stdout) */
    if (outputFileNameP == NULL)
      out_fp = stdout;
    else
    {
      out_fp = fopen(outputFileNameP, "w");
      if (out_fp == NULL)
      {
        DP("lxtrace format: could not open file %s, errno %d\n",
           outputFileNameP, errno);
        rc = errno;
        goto exit_format;
      }
    }

    /* Print first line of header and build trace format string */
    fmtString[0] = '\0';
    if (useTOD)
    {
      fprintf(out_fp, H1_TS_T);
      strcat(fmtString, FMT_TS_T);
    }
    else
    {
      fprintf(out_fp, H1_TS);
      strcat(fmtString, FMT_TS);
    }
    fprintf(out_fp, H1_PID);
    strcat(fmtString, FMT_PID);
    if (verbose)
    {
      fprintf(out_fp, H1_P_HW_V);
      strcat(fmtString, FMT_P_HW_V);
    }
    fprintf(out_fp, H1_TAG);
    strcat(fmtString, FMT_TAG);

    /* Print second line of header */
    if (useTOD)
      fprintf(out_fp, H2_TS_T);
    else
      fprintf(out_fp, H2_TS);
    fprintf(out_fp, H2_PID);
    if (verbose)
      fprintf(out_fp, H2_P_HW_V);
    fprintf(out_fp, H2_TAG);

    /* The trace file (raw_fp) is a wrap-around file.  We need to read it to
       see if it ever wrapped.  If it never wrapped, rewind and process it
       top-to-bottom.  If the file did wrap, set the file pointer to the oldest
       record and begin processing from there.  ReadNextTrcHeader detects the
       wrap point.  When formatting of a wrapped file reaches end-of-file, it
       must then go back and read starting at the beginning and stop when
       the wrap point is again reached. */

    hasWrapped = false; /* Signal format processing below we have a wrap file */
    nWraps = 0;         /* ReadNextTrcHeader counts wrap points it encounters.*/

    nTrcHdrs = 0;
    while (0 < (rc = ReadNextTrcHeader(raw_fp, &h)))
    {
      nTrcHdrs++;

      /* If ReadNextTrcHeader detects a wrap, leave and begin formating. */
      if (nWraps)
        break;

      /* skip over trace data */
      fseek(raw_fp, h.hdr.trLength - sizeof(trc_datahdr_t), SEEK_CUR);
    }

    DFP("wrap analysis: nTrcHdrs %d nWraps %d seekpos %d\n",
        nTrcHdrs, nWraps, ftell(raw_fp) - sizeof(trc_hdrs_t));

    /* If a wrap point was located, back-up so the header can be read again
       as formatting begins with this record.  If the file never wrapped,
       reset to the top of the file and process top-to-bottom. */
    if (nWraps)
    {
      fseek(raw_fp, -sizeof(trc_hdrs_t), SEEK_CUR);
      hasWrapped = true; /* Signal format procesing below to handle the wrap */
    }
    else
      rewind(raw_fp);

start_formatting:
    nWraps = 0; /* Reset the ReadNextTrcHeader wrap-found indicator. */

    /* Process raw trace records. Start by reading the device header. */
    while (0 < (rc = ReadNextTrcHeader(raw_fp, &h)))
    {
      DFP("hdr.trTime %X:%X hdr.trProcess %X hdr.trCPU %X hdr.trLength %X\n",
          h.hdr.trTime.tv_sec, h.hdr.trTime.tv_usec, h.hdr.trProcess,
          h.hdr.trCPU, h.hdr.trLength);

      /* Since we always start AFTER the wrap point, we should only see a
         wrap if we had reached the end of the file and then gone back to
         the beginning to get the records from there up to the wrap point.
         So, if we see this, we have formatted all the records and are done. */

      if (nWraps) // ReadNextTrcHeader detected and processed a wrap-point
      {
        DFP("lxtrace format: ending after processing records 0 to wrap-point\n");
        break;
      }

      DFP("rec.trHook 0x%X rec.trNArgs %d rec.trSPos %d rec.trSLen %d\n",
          h.rec.trHook, h.rec.trNArgs, h.rec.trSPos, h.rec.trSLen);

      if (h.hdr.trLength < sizeof(trc_datahdr_t))
      {
        DP("lxtrace format: bad record length in %s at offset %d, trLength %d\n",
           rawTraceFileNameP, ftell(raw_fp), h.hdr.trLength);
        rc = EINVAL;
        if (hasWrapped)
        {
          DP("lxtrace format: restart from wrap-point\n");
          fprintf(out_fp, "*** BAD trace record header ***\n");
          goto do_wrap;
        }
        goto exit_format;
      }

      /* application_data[] holds the raw trace data that follows the headers. */
      if (0 > (rc = fread(application_data, 1,
                          h.hdr.trLength - sizeof(trc_datahdr_t), raw_fp)))
      {
        DP("lxtrace format: could not read data from %s at offset %d, rc %d errno %d\n",
           rawTraceFileNameP, ftell(raw_fp), rc, errno);
        rc = errno;
        goto exit_format;
      }

      DFP("read %d bytes of application trace data\n", rc);

      /* If this is the first trace record, consider this time zero,
         unless the -T option was specified */
      if (tzero.tv_sec == 0  &&  !useTOD)
        tzero = h.hdr.trTime;

      /* Convert trace time to time since first trace record unless
         the -T option was specified */
      if (!useTOD)
      {
        h.hdr.trTime.tv_sec -= tzero.tv_sec;
        if (h.hdr.trTime.tv_usec >= tzero.tv_usec)
          h.hdr.trTime.tv_usec -= tzero.tv_usec;
        else
        {
          h.hdr.trTime.tv_sec--;
          h.hdr.trTime.tv_usec += 1000000 - tzero.tv_usec;
        }
      }

      /* Get the formatting record for the trace record with this hookid */
      if (0 != GetFormatRecord(fmt_fp, (int)(h.rec.trHook & 0x0000FFFF),
                               &compP, &fmtP, &llmask, &i2nP))
      {
        DP("lxtrace format: No formatting record found for hook %X\n",
           h.rec.trHook);
        fprintf(out_fp, "Unknown hook id 0x%08X\n", h.rec.trHook);
      }
      else
      {
        /* Found the formatting information for the current trace record. */

        /* Write leading information before the formatted trace. */
        if (verbose)
          fprintf(out_fp, fmtString,
                  h.hdr.trTime.tv_sec, h.hdr.trTime.tv_usec, h.hdr.trProcess,
                  h.hdr.trCPU, h.rec.trHook,
                  compP);
        else
          fprintf(out_fp, fmtString,
                  h.hdr.trTime.tv_sec, h.hdr.trTime.tv_usec, h.hdr.trProcess,
                  compP);

        DFP("format string: %s\n", fmtP);
        DFP("raw data: %X %X %X %X %X\n",
            TRC_ARG(0), TRC_ARG(1), TRC_ARG(2), TRC_ARG(3), TRC_ARG(4));

        /* Format-specific processing. */
        if (h.rec.trSPos == _TR_FORMAT_I)
        {
          /* Trace data consists of n integer values */
          for (i=0; i < h.rec.trNArgs; i++)
            argP[i] = TRC_ARG(i);
        }
        else if (h.rec.trSPos < LXTRACE_MAX_FORMAT_SUBS)
        {
          /* Trace data contains null terminated string at TRC_ARG(rec.trSPos);
             the length of the string is given by rec.trSLen (rounded up to a
             multiple of ARGLEN).  Get a pointer to the string, make sure it's
             null-terminated (so we don't blow up if the trace data is trash),
             and compute how many extra TRC_ARG words are occupied by the
             string. */
          // ?? should validate trSLen!
          char *s = (char *)&TRC_ARG(h.rec.trSPos);
          int xi = h.rec.trSLen/ARGLEN - 1;
          s[h.rec.trSLen - 1] = '\0';

          for (i = 0; i < h.rec.trSPos; i++)
            argP[i] = TRC_ARG(i);
          argP[i++] = (argType)s;
          for (; i <= h.rec.trNArgs; i++)
            argP[i] = TRC_ARG(i + xi);
        }
        else if (h.rec.trSPos == _TR_FORMAT_F)
        {
          /* First n words are integer values; a double value is stored at
             TRC_ARG(rec.trNArgs) */
          double *d = (double *)&TRC_ARG(h.rec.trNArgs);

          for (i = 0; i < h.rec.trNArgs; i++)
            argP[i] = TRC_ARG(i);
          memcpy(&argP[i], d, sizeof(double));
        }
        else if (h.rec.trSPos == _TR_FORMAT_X)
        {
          /* XTrace type (preformatted) record. */
          fprintf(out_fp, "%s", application_data);
          continue;
        }
        else
        {
          DP("lxtrace format: format %d not supported\n", h.rec.trSPos);
          fprintf(out_fp, "Unsupported format %d\n", h.rec.trSPos);
          goto exit_format;
        }

#if defined(GPFS_LITTLE_ENDIAN) && !defined(__64BIT__)
        /* On a 32 bit machine, 64 bit values are stored with the high-order
           word first, followed by the low-order word.  The problem is that
           the reverse is expected by printf formatting.  For each "%ll" in
           the formatting string, swap the words in the argP array so that
           the words are swapped into the expected order.  The llmask
           bitvector indicates which arguments are of the %ll form. */
        if (llmask)
        {
          int tmp;
          int argPos = 0;

          DFP("\nlong long formatting: %s\n", fmtP);

          /* Loop through the bits in llmask */
          while (llmask)
          {
            if (llmask & 1)
            {
              DFP("switching argP[%d] and argP[%d]\n", argPos, argPos+1);
              tmp = argP[argPos];
              argP[argPos] = argP[argPos+1];
              argP[argPos+1]  = tmp;
              argPos += 1; /* account for the extra arg for this 2-word sub */
            }
            argPos += 1;
            llmask >>= 1;
          }
        }
#endif // GPFS_LITTLE_ENDIAN and NOT __64BIT__

#ifdef DBGASSERTS
        if (i2nP == NULL &&
            (h.rec.trSPos==_TR_FORMAT_I || h.rec.trSPos==_TR_FORMAT_F) &&
            strstr(fmtP, "%s"))
        {
          DP("lxtrace format: hook %X is %s format record containing a string specification.\n",
             (h.rec.trHook&0x0000FFFF),
             (h.rec.trSPos==_TR_FORMAT_I) ? (char *) "an integer" :
               (h.rec.trSPos==_TR_FORMAT_F)? (char *) "a float":
               (char *) "an unknown");
          DP("               The formatting information for this record does not provide any substitutions.\n");
          fprintf(out_fp, "%s\n", fmtP);
          goto exit_format;
        }
#endif // DBGASSERTS

        /* Update the argP[] array as necessary to do the substitutions */
        if (i2nP != NULL)
        {
          /* This loop proceses each of the substitution strings */
          for (i = 0; i2nP[i].i2nNameP != NULL; i++)
          {
            int whichSub = i2nP[i].argNum - 1;
            char *argtP  = i2nP[i].i2nNameP;

            id = argP[whichSub];

            /* Thread_id2name conversion need special treatment */
            msgHandler = false;
            if (strcmp(argtP, "Thread_id2name") == 0)
            {
              /* Thread IDs >= FirstMsgHandlerThread are really message
                 handler IDs, so look for a tscMsg_id2name entry in that case.
                 Thread IDs < FirstMsgHandlerThread are "standard" thread IDs,
                 so look for StdThread_id2name in that case. */
              if (id >= FirstMsgHandlerThread)
              {
                /* Adjust id value and conversion function.  Set a flag to
                   remember to insert "Msg handler" in front of the name that
                   is found. */
                id -= FirstMsgHandlerThread;
                argtP = "tscMsg_id2name";
                msgHandler = true;
              }
              else
                argtP = "StdThread_id2name";
            }
            
            /* Now look-up the substitution for argtP (xx_id2name) that is
               associated with the appropriate traced value (id). */
            if (0 != GetID2NameRecord(fmt_fp, argtP, id, TRC_MAX_ID2NAME,
                                      id2nameP[whichSub].s))
            {
              /* Unable to find the formatting (substitution) information */

              /* ip addresses are formatted as such */
              if ((0 == strcmp(argtP, "nodeaddr_id2name"))  ||
                  (0 == strcmp(argtP, "ipaddr_id2name")) )
              {
                union { int i; unsigned char c[4]; } a;

                a.i = id;
                sprintf(id2nameP[whichSub].s, "%u.%u.%u.%u",
                        a.c[0], a.c[1], a.c[2], a.c[3]);
              }

              /* Some substitutions are just int to string conversions. */
              else if ((0 == strcmp(argtP, "lockstateHigh_id2name")) ||
                       (0 == strcmp(argtP, "lockstateLow_id2name")) ||
                       (0 == strcmp(argtP, "lockflags_id2name")))
              {
                sprintf(id2nameP[whichSub].s, "%X", id);
              }

              /* reason is optional so handle the resulting value of zero. */
              else if ((0 == strcmp(argtP, "reason_id2name")) &&
                       (id == 0))
                strcpy(id2nameP[whichSub].s, "no reason given");

              /* Any other case is a lack of proper formatting information
                 for the generated trace record.  Insert "unknown" into the
                 formatted output and continue. */
              else
              {
#ifdef DBGASSERTS
                DP("lxtrace format: %s substitution %d (value=%d) not found (used in hook %04X).\n",
                   argtP, whichSub+1, argP[whichSub], (h.rec.trHook&0x0000FFFF));
                // To debug one of these, exit here.
                // goto exit_format;
#endif // DBGASSERTS
                sprintf(id2nameP[whichSub].s, "unknown(%d)", argP[whichSub]);
              }
            }

            /* If we are converting the thread ID of a message handler,
               insert "Msg handler" in front of the name. */
            if (msgHandler)
            {
              char tmpBuff[TRC_MAX_ID2NAME];
              strncpy(tmpBuff, id2nameP[whichSub].s, sizeof(tmpBuff));
              snprintf(id2nameP[whichSub].s, TRC_MAX_ID2NAME,
                       "Msg handler %s", tmpBuff);
            }

            /* Alter the TRC_ARGLIST for this substituted parameter */
            argP[whichSub] = (argType)id2nameP[whichSub].s;
          } /* for each substitution string */

        } /* End-substitutions */

        /* Output the completed trace record */
        fprintf(out_fp, fmtP, TRC_ARGLIST);
        nRecordsProcessed += 1;

      } /* end processing this trace record */
    } /* while there are more unformatted trace records to process */

    if (hasWrapped)
    {
do_wrap:
      rewind(raw_fp); /* On a wrapped file, need to go back to the top. */
      hasWrapped = false;  /* Clear this so we don't do this more than once. */
      DFP("lxtrace format: done with wrap-point to EOF.  Going back to record 0\n");
      goto start_formatting;
    }

    rc = 0;

exit_format:
    /* print hash lookup statistics */
    DFP("Stats: hook lookup %6d check %7d (%.1f)\n",
        nHookLookup, nHookCheck, (double)nHookCheck/(double)nHookLookup);
    DFP("        i2n lookup %6d check %7d (%.1f) fail %d\n",
        nI2NLookup, nI2NCheck, (double)nI2NCheck/(double)nI2NLookup, nI2NFail);

    /* cleanup */
    if (formatFileP) free(formatFileP);
    if (fmtDescSpaceP) free(fmtDescSpaceP);
    if (raw_fp != NULL) fclose(raw_fp);
    if (fmt_fp != NULL) fclose(fmt_fp);
    if (out_fp != NULL) fclose(out_fp);
  } /* "format" option */


  /* Display the state information associated with the device. */
  else if (strcmp(argv[1], "dump") == 0)
  {
    int fd;
#define DUMP_LEN 512
    char dumpBufP[DUMP_LEN];

    fd = open(TRC_DEVICE, O_RDONLY);
    if (fd < 0)
    {
      DP("lxtrace dump: device open failed errno %d\n", errno);
      rc = ENODEV;
      goto exit;
    }

    /* Dump the trace device information */
    args.arg1 = DUMP_LEN;
    args.arg2 = (long)dumpBufP;

    if (0 != (rc = ioctl(fd, trc_dump, &args)))
    {
      DP("lxtrace dump: device dump returns rc %d errno %d \n", rc, errno);
    }
    else
    {
      printf("%s\n", dumpBufP);
    }

exit_dump:
    close(fd);
  }

  /* Flush any data that is currently buffered in the device. */
  else if (strcmp(argv[1], "fsync") == 0)
  {
    int fd;

    fd = open(TRC_DEVICE, O_RDONLY);
    if (fd < 0)
    {
      DP("lxtrace fsync: device open failed with errno %d\n", errno);
      rc = ENODEV;
      goto exit;
    }

    if (0 != (rc = fsync(fd)))
    {
      DP("lxtrace fsync: device fsync returns rc %d errno %d\n", rc, errno);
    }

exit_fsync:
    close(fd);
  }

  else
    Usage();

exit:
  return rc;
}


